Acute Changes in CEWL as a Function of Temperature: Data Wrangling

Calvin Davis, Savannah Weaver

2022

Packages

`%nin%` = Negate(`%in%`)
if (!require("tidyverse")) install.packages("tidyverse") 
library("tidyverse") #tidyr, ggplot, dplyr, etc
if (!require("RColorBrewer")) install.packages("RColorBrewer")
library("RColorBrewer") # plot colors
if (!require("rmdformats")) install.packages("rmdformats")
library("rmdformats") # clean html R markdown format

Background

This code walks through the data wrangling for the analysis of a study on the rate of change of cutaneous evaporative water loss (CEWL) in response to temperature change in Western Fence Lizards (Sceloporus occidentalis) (published in the Journal __ in 2023). Data was collected and analyzed by Calvin Davis and Savannah Weaver, under the advising of Dr. Emily Taylor at California Polytechnic State University.

Load Data

Collection Data

Variables:

  • date = date of capture
  • time_capture = time lizards were captured
  • individual ID for each lizard
  • dorsal couple = label of which dorsal surface thermocouple was used
  • cloacal couple = label of which thermocouple was inserted into cloaca
  • dorsal/cloacal lead port = which port thermocouples were plugged into the thermologger
  • time in chamber = time lizards were placed in incubation chambers
  • time out chamber = time lizards were removed from incubation chamber
  • chamber time elapsed min = amount of time lizards spent in incubation chamber in minutes
  • chamber time elapsed hr = amount of time lizards spent in incubation chamber in hours

Load in data, format, and calculate additional metrics.

collection_data <- read.csv("./Data/Collection Data.csv", 
                            header = TRUE,
                            fileEncoding="UTF-8-BOM") %>%  #removes junk characters for first column name
   mutate(date_time_capture = (as.POSIXct(paste(date, time_capture), 
                                           format = "%d/%m/%Y %H:%M")),
          rounded_date_time = round.POSIXt(date_time_capture, units = "hours"),
          date = as.Date(date, format = "%d/%m/%Y"),
          time_capture = as.POSIXct(time_capture, format = "%H:%M"),
          individual_ID = as.factor(individual_ID),
          treatment = as.factor(treatment),
          dorsal_couple = as.factor(dorsal_couple),
          dorsal_lead_port = as.factor(dorsal_lead_port),
          cloacal_couple = as.factor(cloacal_couple),
          cloacal_lead_port = as.factor(cloacal_lead_port),
          time_in_chamber = as.POSIXct(time_in_chamber, 
                                       format = "%H:%M"),
          hold_time_hr = (as.numeric(time_in_chamber - time_capture))/60,
          time_out_chamber = as.POSIXct(time_out_chamber, 
                                        format = "%H:%M"),
          chamber_time_elapsed_min = as.numeric(time_out_chamber - time_in_chamber),
          chamber_time_elapsed_hr = chamber_time_elapsed_min/60,
          # add estimated "chamber time" for control lizards
          chamber_time_elapsed_hr = (case_when(treatment == 'Control' ~ 1,
                                              treatment != 'Control' ~ chamber_time_elapsed_hr)) # used as.factor to check that elapsed time = 1hr for 8336 obs of control lizards
          )
summary(collection_data)
##       date             time_capture                        sock          
##  Min.   :2021-10-05   Min.   :2023-01-12 10:10:00.00   Length:39         
##  1st Qu.:2021-10-11   1st Qu.:2023-01-12 10:38:00.00   Class :character  
##  Median :2021-10-12   Median :2023-01-12 11:06:00.00   Mode  :character  
##  Mean   :2021-10-13   Mean   :2023-01-12 11:08:24.61                     
##  3rd Qu.:2021-10-18   3rd Qu.:2023-01-12 11:20:30.00                     
##  Max.   :2021-10-19   Max.   :2023-01-12 16:00:00.00                     
##                                                                          
##  individual_ID   treatment      mass_g          SVL_mm     
##  401    : 1    Control:12   Min.   : 8.70   Min.   :60.00  
##  402    : 1    Cooling:14   1st Qu.: 9.90   1st Qu.:63.00  
##  403    : 1    Heating:13   Median :11.20   Median :65.00  
##  404    : 1                 Mean   :11.32   Mean   :66.03  
##  405    : 1                 3rd Qu.:12.50   3rd Qu.:68.50  
##  406    : 1                 Max.   :15.30   Max.   :75.00  
##  (Other):33                                                
##  time_in_chamber                  time_out_chamber              dorsal_couple
##  Min.   :2023-01-12 12:28:00.00   Min.   :2023-01-12 13:35:00   A      :8    
##  1st Qu.:2023-01-12 13:16:30.00   1st Qu.:2023-01-12 14:28:00   D      :7    
##  Median :2023-01-12 14:09:00.00   Median :2023-01-12 15:15:00   B      :5    
##  Mean   :2023-01-12 14:14:31.10   Mean   :2023-01-12 15:23:00   F      :5    
##  3rd Qu.:2023-01-12 15:11:30.00   3rd Qu.:2023-01-12 16:27:30   H      :5    
##  Max.   :2023-01-12 16:36:00.00   Max.   :2023-01-12 17:33:00   E      :4    
##  NA's   :12                       NA's   :12                    (Other):5    
##  dorsal_lead_port cloacal_couple cloacal_lead_port    notes          
##  T2:39            E      :10     T1:39             Length:39         
##                   F      : 8                       Class :character  
##                   B      : 5                       Mode  :character  
##                   C      : 5                                         
##                   D      : 5                                         
##                   A      : 3                                         
##                   (Other): 3                                         
##  date_time_capture             rounded_date_time                 hold_time_hr  
##  Min.   :2021-10-05 10:10:00   Min.   :2021-10-05 10:00:00.00   Min.   :0.600  
##  1st Qu.:2021-10-11 10:31:30   1st Qu.:2021-10-11 11:00:00.00   1st Qu.:2.058  
##  Median :2021-10-12 11:03:00   Median :2021-10-12 11:00:00.00   Median :2.983  
##  Mean   :2021-10-13 11:45:20   Mean   :2021-10-13 11:46:09.23   Mean   :2.958  
##  3rd Qu.:2021-10-18 11:22:00   3rd Qu.:2021-10-18 11:00:00.00   3rd Qu.:3.792  
##  Max.   :2021-10-19 11:58:00   Max.   :2021-10-19 12:00:00.00   Max.   :5.583  
##                                                                 NA's   :12     
##  chamber_time_elapsed_min chamber_time_elapsed_hr
##  Min.   :56.00            Min.   :0.9333         
##  1st Qu.:62.00            1st Qu.:1.0000         
##  Median :66.00            Median :1.0333         
##  Mean   :68.48            Mean   :1.0979         
##  3rd Qu.:74.50            3rd Qu.:1.1417         
##  Max.   :92.00            Max.   :1.5333         
##  NA's   :12

Collection Stats for Permitting

permit_stats <- collection_data %>%
   group_by(date) %>%
   summarise(n = n())
permit_stats
## # A tibble: 5 × 2
##   date           n
##   <date>     <int>
## 1 2021-10-05     8
## 2 2021-10-11     8
## 3 2021-10-12     7
## 4 2021-10-18     8
## 5 2021-10-19     8

Mean Values by Tmt

means <- collection_data %>%
  group_by(treatment) %>%
  summarise(mean(mass_g),
            sd(mass_g),
            mean(SVL_mm),
            sd(SVL_mm),
            mean(chamber_time_elapsed_min))
means
## # A tibble: 3 × 6
##   treatment `mean(mass_g)` `sd(mass_g)` `mean(SVL_mm)` `sd(SVL_mm)` mean(chamb…¹
##   <fct>              <dbl>        <dbl>          <dbl>        <dbl>        <dbl>
## 1 Control             11.4         1.49           66.8         2.77         NA  
## 2 Cooling             11.3         1.77           66.1         4.35         72.1
## 3 Heating             11.3         2.06           65.2         3.61         64.5
## # … with abbreviated variable name ¹​`mean(chamber_time_elapsed_min)`
aov_mass <- aov(data = collection_data, mass_g ~ treatment)
summary(aov_mass)
##             Df Sum Sq Mean Sq F value Pr(>F)
## treatment    2   0.07   0.035   0.011  0.989
## Residuals   36 116.23   3.229
TukeyHSD(aov_mass)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = mass_g ~ treatment, data = collection_data)
## 
## $treatment
##                        diff       lwr      upr     p adj
## Cooling-Control -0.09761905 -1.825449 1.630211 0.9895441
## Heating-Control -0.08333333 -1.841567 1.674901 0.9926295
## Heating-Cooling  0.01428571 -1.677382 1.705954 0.9997651

CEWL Data

First, need to get the start times for each AquaFlux run:

# load data
AF_starts_wide <- read.csv("./Data/AquaFlux start times.csv",
                           fileEncoding="UTF-8-BOM")
# reorganize
rownames(AF_starts_wide) <- AF_starts_wide$Probe
AF_starts_wide$Probe <- NULL
AF_starts_tidy <- as.data.frame(t(as.matrix(AF_starts_wide))) %>%
   mutate(lizard_ID = as.factor(Comments),
          date_time_start = as.POSIXct(paste(date,time_start), 
                                       format = "%m/%d/%y %I:%M:%S %p"),
          date = as.Date(date, format = "%m/%d/%y")
          ) %>%
   dplyr::select(lizard_ID, date, date_time_start)
summary(AF_starts_tidy)
##    lizard_ID       date            date_time_start                 
##  401    : 1   Min.   :2021-10-05   Min.   :2021-10-05 13:21:12.00  
##  402    : 1   1st Qu.:2021-10-11   1st Qu.:2021-10-11 14:28:24.50  
##  403    : 1   Median :2021-10-12   Median :2021-10-12 15:10:09.00  
##  404    : 1   Mean   :2021-10-13   Mean   :2021-10-13 15:59:48.04  
##  405    : 1   3rd Qu.:2021-10-18   3rd Qu.:2021-10-18 16:47:25.50  
##  406    : 1   Max.   :2021-10-19   Max.   :2021-10-19 16:30:20.00  
##  (Other):33

Get filenames:

# make a list of file names of all data to load in
filenames_aquaflux <- list.files(path = "./Data/AquaFlux data", 
                                 pattern = ".csv")

Make a function that will read in the data from each csv, name and organize the data correctly.

Variables that will be read-in related to CEWL:

  • time sec = time recording from Evaporimeter
  • CEWL_g_m2h = cutaneous evaporative water loss
  • msmt_temp_C = ambient temperature in Celsius
  • msmt_RH_percent = % relative ambient humidity
read_CEWL_file <- function(filename) {
  
   # save helpful info
   date <- substr(filename, 1, 10)
   name <- substr(filename, 19, 21)
   
   ## load file
   dat <- read.csv(file.path("./Data/AquaFlux data", filename), 
                   header = TRUE, 
                   fileEncoding="UTF-8-BOM") %>%
      # select only the relevant values
      dplyr::select(time_sec = 'Time..sec.',
                    CEWL_g_m2h = 'Flux..g..m2h..', 
                    msmt_temp_C = 'AmbT..C.', 
                    msmt_RH_percent = 'AmbRH....'
                    ) %>%
      # use filename info     
      dplyr::mutate(date = date,
                    lizard_ID = name)
   # return the dataframe for that single csv file
   dat
   
}

Apply function to all files, compile, and incorporate timing:

# apply function to get data from all csvs
all_CEWL_data <- lapply(filenames_aquaflux, read_CEWL_file) %>%
   # paste all data files together into one df by row
   reduce(rbind) %>%
   # properly format data classes
   mutate(lizard_ID = as.factor(lizard_ID),
          date = as.Date(date, format = "%d-%m-%Y")
          ) %>%
   # add actual times
   left_join(AF_starts_tidy, by = c("date", "lizard_ID")) %>%
   # calculate current times for each point
   mutate(date_time = date_time_start + floor(time_sec))
summary(all_CEWL_data)
##     time_sec       CEWL_g_m2h     msmt_temp_C    msmt_RH_percent
##  Min.   :  0.0   Min.   : 0.03   Min.   :24.00   Min.   :21.10  
##  1st Qu.:225.0   1st Qu.: 7.26   1st Qu.:24.80   1st Qu.:32.40  
##  Median :450.2   Median :12.62   Median :25.30   Median :33.70  
##  Mean   :450.3   Mean   :13.42   Mean   :25.16   Mean   :35.09  
##  3rd Qu.:675.7   3rd Qu.:18.44   3rd Qu.:25.50   3rd Qu.:37.40  
##  Max.   :901.0   Max.   :37.40   Max.   :26.00   Max.   :48.20  
##                                                                 
##       date              lizard_ID     date_time_start                 
##  Min.   :2021-10-05   417    :  887   Min.   :2021-10-05 13:21:12.00  
##  1st Qu.:2021-10-11   409    :  886   1st Qu.:2021-10-11 14:11:19.00  
##  Median :2021-10-12   410    :  886   Median :2021-10-12 15:10:09.00  
##  Mean   :2021-10-13   411    :  886   Mean   :2021-10-13 15:59:35.94  
##  3rd Qu.:2021-10-18   412    :  886   3rd Qu.:2021-10-18 17:01:43.00  
##  Max.   :2021-10-19   413    :  886   Max.   :2021-10-19 16:30:20.00  
##                       (Other):29216                                   
##    date_time                     
##  Min.   :2021-10-05 13:21:12.00  
##  1st Qu.:2021-10-11 14:22:37.00  
##  Median :2021-10-12 15:17:35.00  
##  Mean   :2021-10-13 16:07:05.77  
##  3rd Qu.:2021-10-18 17:05:27.00  
##  Max.   :2021-10-19 16:45:20.00  
## 
unique(all_CEWL_data$date)
## [1] "2021-10-05" "2021-10-11" "2021-10-12" "2021-10-18" "2021-10-19"

Weather Data

get filenames:

filenames_weather <- list.files(path = "./Data/Weather Data", 
                                pattern = ".csv")

Weather data for each capture day includes the following variables:

  • date
  • time
  • temp_f = temperature in Fahrenheit
  • wind_speed_mph = wind speed in miles per hr
  • relative_humidity_percent = % relative humidity
  • solar_rad_w_m2 = Watts per square meter of solar radiation
  • precip_inches = inches of precipitation
read_weather_file <- function(filename) {
  
   # read file
   dat <- read.csv(file.path("./Data/Weather Data", filename), 
                   header = TRUE, # each csv has headers
                   check.names = F,
                   sep = ";" #separate data via semicolons
                   )
  # return the dataframe for that single csv file
  dat
  
}

Apply function to all dataframes and compile:

# apply function to get data from all csvs
all_weather_data <- lapply(filenames_weather, read_weather_file) %>%
  # paste all data files together into one df by row
  reduce(rbind) %>%
      # select only the relevant values
      dplyr::select(date = "Date",
                    time = "Time",
                    temp_F = "Temperature (\xb0F)", 
                    wind_speed_mph = "Wind Speed (ave) (mph)", 
                    relative_humidity_percent = "Relative Humidity (% RH)",
                    solar_rad_W_m2 = "Pyranometer 0-2000 W/m\xb2 (W/m\xb2)",
                    precip_inches = "Precipitation II (inch)"
                    ) %>%
      # properly format data classes
      dplyr::mutate(date_time = as.POSIXct(paste(date, time), 
                                           format = "%m/%d/%y %I:%M %p"),
                    date = as.Date(date, format = "%m/%d/%y"),
                    time = as.POSIXct(time, format = "%I:%M %p"),
                    temp_F = as.numeric(temp_F), 
                    temp_C =(temp_F-32)*(5/9),
                    relative_humidity_percent = as.numeric(relative_humidity_percent),
                    solar_rad_W_m2 = as.numeric(solar_rad_W_m2),
                    wind_speed_mph = as.numeric(wind_speed_mph),
                    precip_inches = as.numeric(precip_inches)
                    ) %>%
   dplyr::filter(complete.cases(temp_C, relative_humidity_percent))
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
summary(all_weather_data)
##       date                 time                            temp_F     
##  Min.   :2021-10-05   Min.   :2023-01-12 00:00:00.00   Min.   :43.00  
##  1st Qu.:2021-10-12   1st Qu.:2023-01-12 05:45:00.00   1st Qu.:54.50  
##  Median :2021-10-16   Median :2023-01-12 11:45:00.00   Median :58.90  
##  Mean   :2021-10-16   Mean   :2023-01-12 11:51:51.85   Mean   :60.36  
##  3rd Qu.:2021-10-21   3rd Qu.:2023-01-12 17:45:00.00   3rd Qu.:65.90  
##  Max.   :2021-10-26   Max.   :2023-01-12 23:45:00.00   Max.   :88.90  
##  wind_speed_mph   relative_humidity_percent solar_rad_W_m2  precip_inches      
##  Min.   : 0.100   Min.   : 12.10            Min.   :  0.0   Min.   :0.0000000  
##  1st Qu.: 0.100   1st Qu.: 44.00            1st Qu.:  0.0   1st Qu.:0.0000000  
##  Median : 2.700   Median : 71.90            Median :  1.5   Median :0.0000000  
##  Mean   : 2.758   Mean   : 68.02            Mean   :190.8   Mean   :0.0009429  
##  3rd Qu.: 4.400   3rd Qu.: 96.10            3rd Qu.:387.0   3rd Qu.:0.0000000  
##  Max.   :12.100   Max.   :100.00            Max.   :879.7   Max.   :0.2170000  
##    date_time                          temp_C      
##  Min.   :2021-10-05 00:00:00.00   Min.   : 6.111  
##  1st Qu.:2021-10-12 08:52:30.00   1st Qu.:12.500  
##  Median :2021-10-16 18:00:00.00   Median :14.944  
##  Mean   :2021-10-16 11:57:00.16   Mean   :15.754  
##  3rd Qu.:2021-10-21 03:00:00.00   3rd Qu.:18.833  
##  Max.   :2021-10-26 00:00:00.00   Max.   :31.611

Get times we want to know weather data for:

# get earliest and latest capture times for a given day
collection_data %>%
  group_by(date) %>%
  summarise(min(time_capture), max(time_capture))
## # A tibble: 5 × 3
##   date       `min(time_capture)` `max(time_capture)`
##   <date>     <dttm>              <dttm>             
## 1 2021-10-05 2023-01-12 10:10:00 2023-01-12 11:40:00
## 2 2021-10-11 2023-01-12 10:10:00 2023-01-12 11:53:00
## 3 2021-10-12 2023-01-12 10:29:00 2023-01-12 16:00:00
## 4 2021-10-18 2023-01-12 10:18:00 2023-01-12 11:34:00
## 5 2021-10-19 2023-01-12 10:20:00 2023-01-12 11:58:00
# list times weather station has recordings, 1h before to 1h after captures for a given day
t1_days <- seq(as.POSIXct("2021-10-05 09:00"),
               as.POSIXct("2021-10-05 13:00"), "15 min")
t2_days <- seq(as.POSIXct("2021-10-11 09:00"),
               as.POSIXct("2021-10-11 13:00"), "15 min")
t3_days <- seq(as.POSIXct("2021-10-12 09:00"),
               as.POSIXct("2021-10-12 17:00"), "15 min")
t4_days <- seq(as.POSIXct("2021-10-18 09:00"),
               as.POSIXct("2021-10-18 13:00"), "15 min")
t5_days <- seq(as.POSIXct("2021-10-19 09:00"),
               as.POSIXct("2021-10-19 13:00"), "15 min")
exp_dates <- data.frame(date_time = c(t1_days, t2_days, t3_days, 
                                      t4_days, t5_days)) %>%
   dplyr::mutate(date = as.Date(substr(date_time, 1, 10)))
summary(exp_dates)
##    date_time                           date           
##  Min.   :2021-10-05 09:00:00.00   Min.   :2021-10-05  
##  1st Qu.:2021-10-11 11:00:00.00   1st Qu.:2021-10-11  
##  Median :2021-10-12 13:00:00.00   Median :2021-10-12  
##  Mean   :2021-10-13 07:51:05.34   Mean   :2021-10-12  
##  3rd Qu.:2021-10-18 11:00:00.00   3rd Qu.:2021-10-18  
##  Max.   :2021-10-19 13:00:00.00   Max.   :2021-10-19

subset weather data including ~1hr buffer before first and after last capture times for each measurement day:

weather_data_subset <- all_weather_data %>%
   dplyr::filter(date_time %in% exp_dates$date_time)
summary(weather_data_subset)
##       date                 time                            temp_F     
##  Min.   :2021-10-05   Min.   :2023-01-12 09:00:00.00   Min.   :54.40  
##  1st Qu.:2021-10-11   1st Qu.:2023-01-12 10:15:00.00   1st Qu.:61.40  
##  Median :2021-10-12   Median :2023-01-12 11:30:00.00   Median :64.70  
##  Mean   :2021-10-13   Mean   :2023-01-12 11:42:48.65   Mean   :64.63  
##  3rd Qu.:2021-10-18   3rd Qu.:2023-01-12 12:45:00.00   3rd Qu.:66.80  
##  Max.   :2021-10-19   Max.   :2023-01-12 17:00:00.00   Max.   :81.00  
##  wind_speed_mph   relative_humidity_percent solar_rad_W_m2  precip_inches
##  Min.   : 0.100   Min.   :19.50             Min.   :289.2   Min.   :0    
##  1st Qu.: 3.900   1st Qu.:30.20             1st Qu.:508.1   1st Qu.:0    
##  Median : 5.000   Median :45.10             Median :669.2   Median :0    
##  Mean   : 5.229   Mean   :42.89             Mean   :632.6   Mean   :0    
##  3rd Qu.: 6.700   3rd Qu.:51.80             3rd Qu.:766.0   3rd Qu.:0    
##  Max.   :10.100   Max.   :74.10             Max.   :833.0   Max.   :0    
##    date_time                          temp_C     
##  Min.   :2021-10-05 09:00:00.00   Min.   :12.44  
##  1st Qu.:2021-10-11 12:30:00.00   1st Qu.:16.33  
##  Median :2021-10-12 14:00:00.00   Median :18.17  
##  Mean   :2021-10-14 01:12:19.46   Mean   :18.13  
##  3rd Qu.:2021-10-18 11:30:00.00   3rd Qu.:19.33  
##  Max.   :2021-10-19 13:00:00.00   Max.   :27.22

Weather Models

Temperature, relative humidity, and wind speed were significantly different among capture days

Temperature

ANOVA:

weather_temp_aov <- aov(data = weather_data_subset, 
                        temp_C ~ as.factor(date))
summary(weather_temp_aov)
##                  Df Sum Sq Mean Sq F value Pr(>F)    
## as.factor(date)   4  930.8  232.70   82.32 <2e-16 ***
## Residuals       180  508.8    2.83                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(weather_temp_aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = temp_C ~ as.factor(date), data = weather_data_subset)
## 
## $`as.factor(date)`
##                             diff         lwr          upr     p adj
## 2021-10-11-2021-10-05 -5.7320261  -7.1082606 -4.355791711 0.0000000
## 2021-10-12-2021-10-05 -6.1604278  -7.4205535 -4.900302109 0.0000000
## 2021-10-18-2021-10-05 -8.8529412 -10.2291756 -7.476706744 0.0000000
## 2021-10-19-2021-10-05 -7.1470588  -8.5232933 -5.770824391 0.0000000
## 2021-10-12-2021-10-11 -0.4284017  -1.4064489  0.549645560 0.7473358
## 2021-10-18-2021-10-11 -3.1209150  -4.2446057 -1.997224324 0.0000000
## 2021-10-19-2021-10-11 -1.4150327  -2.5387234 -0.291341971 0.0058066
## 2021-10-18-2021-10-12 -2.6925134  -3.6705606 -1.714466146 0.0000000
## 2021-10-19-2021-10-12 -0.9866310  -1.9646782 -0.008583793 0.0468755
## 2021-10-19-2021-10-18  1.7058824   0.5821916  2.829573062 0.0004273

Figure:

ggplot(data = weather_data_subset) + 
   aes(x = as.factor(date), 
       y = temp_C, 
       color = as.factor(date)) +
   geom_boxplot(size = 1) +
   geom_jitter(size = 0.6,
               alpha = 0.6) +
   theme_classic() + 
   xlab("Date") + 
   ylab("Average Temperature During Capture (C)") + 
   #annotate("text", x = 3, y = 22, label = "example", size = 6) +
   scale_color_brewer(palette = "Set2") +
   theme(text = element_text(color = "black", 
                            family = "sans", 
                            size = 18),
         axis.text = element_text(color = "black", 
                                 family = "sans", 
                                 size = 14),
         legend.text.align = 0,
         legend.position = "none") -> weather_temp_fig
weather_temp_fig

Relative Humidity

NOTE: will need to convert to VPD if we intend to add this to the LMM, but it’s more likely we will just incorporate via capture_date random effect.

weather_humid_aov <- aov(data = weather_data_subset, 
                        relative_humidity_percent ~ as.factor(date))
summary(weather_humid_aov)
##                  Df Sum Sq Mean Sq F value Pr(>F)    
## as.factor(date)   4  22366    5591   71.91 <2e-16 ***
## Residuals       180  13996      78                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(weather_humid_aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = relative_humidity_percent ~ as.factor(date), data = weather_data_subset)
## 
## $`as.factor(date)`
##                             diff        lwr        upr     p adj
## 2021-10-11-2021-10-05 -14.541176 -21.759105  -7.323248 0.0000010
## 2021-10-12-2021-10-05 -24.974510 -31.583484 -18.365536 0.0000000
## 2021-10-18-2021-10-05   2.582353  -4.635576   9.800282 0.8614006
## 2021-10-19-2021-10-05  -5.670588 -12.888517   1.547340 0.1979082
## 2021-10-12-2021-10-11 -10.433333 -15.562892  -5.303775 0.0000008
## 2021-10-18-2021-10-11  17.123529  11.230115  23.016943 0.0000000
## 2021-10-19-2021-10-11   8.870588   2.977174  14.764002 0.0004917
## 2021-10-18-2021-10-12  27.556863  22.427304  32.686421 0.0000000
## 2021-10-19-2021-10-12  19.303922  14.174363  24.433480 0.0000000
## 2021-10-19-2021-10-18  -8.252941 -14.146355  -2.359527 0.0014758
ggplot(data = weather_data_subset) + 
   aes(x = as.factor(date), 
       y = relative_humidity_percent, 
       color = as.factor(date)) +
   geom_boxplot(size = 1) +
   geom_jitter(size = 0.6,
               alpha = 0.6) +
   theme_classic() + 
   xlab("Date") + 
   ylab("Average Relative Humidity During Capture (%)") + 
   #annotate("text", x = 3, y = 22, label = "example", size = 6) +
   scale_color_brewer(palette = "Set2") +
   theme(text = element_text(color = "black", 
                            family = "sans", 
                            size = 18),
         axis.text = element_text(color = "black", 
                                 family = "sans", 
                                 size = 14),
         legend.text.align = 0,
         legend.position = "none") -> weather_humid_fig
weather_humid_fig

Solar Radiation

weather_sorad_aov <- aov(data = weather_data_subset, 
                        solar_rad_W_m2 ~ as.factor(date))
summary(weather_sorad_aov)
##                  Df  Sum Sq Mean Sq F value Pr(>F)
## as.factor(date)   4   15183    3796   0.158  0.959
## Residuals       180 4314164   23968
TukeyHSD(weather_sorad_aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = solar_rad_W_m2 ~ as.factor(date), data = weather_data_subset)
## 
## $`as.factor(date)`
##                             diff        lwr       upr     p adj
## 2021-10-11-2021-10-05  18.864706 -107.85777 145.58719 0.9940100
## 2021-10-12-2021-10-05  27.290553  -88.74074 143.32184 0.9668058
## 2021-10-18-2021-10-05   7.264706 -119.45777 133.98719 0.9998590
## 2021-10-19-2021-10-05  16.611765 -110.11072 143.33424 0.9963337
## 2021-10-12-2021-10-11   8.425847  -81.63190  98.48359 0.9990194
## 2021-10-18-2021-10-11 -11.600000 -115.06847  91.86847 0.9980058
## 2021-10-19-2021-10-11  -2.252941 -105.72141 101.21553 0.9999970
## 2021-10-18-2021-10-12 -20.025847 -110.08359  70.03190 0.9729271
## 2021-10-19-2021-10-12 -10.678788 -100.73653  79.37896 0.9975178
## 2021-10-19-2021-10-18   9.347059  -94.12141 112.81553 0.9991459
ggplot(data = weather_data_subset) + 
   aes(x = as.factor(date), 
       y = solar_rad_W_m2 , 
       color = as.factor(date)) +
   geom_boxplot(size = 1) +
   geom_jitter(size = 0.6,
               alpha = 0.6) +
   theme_classic() + 
   xlab("Date") + 
   ylab("Average Solar Radiation During Capture (W/m2)") + 
   #annotate("text", x = 3, y = 22, label = "example", size = 6) +
   scale_color_brewer(palette = "Set2") +
   theme(text = element_text(color = "black", 
                            family = "sans", 
                            size = 18),
         axis.text = element_text(color = "black", 
                                 family = "sans", 
                                 size = 14),
         legend.text.align = 0,
         legend.position = "none") -> weather_solar_fig
weather_solar_fig

Wind Speed

weather_wind_aov <- aov(data = weather_data_subset, 
                        wind_speed_mph ~ as.factor(date))
summary(weather_wind_aov)
##                  Df Sum Sq Mean Sq F value Pr(>F)    
## as.factor(date)   4  401.1  100.28   60.84 <2e-16 ***
## Residuals       180  296.7    1.65                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(weather_wind_aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = wind_speed_mph ~ as.factor(date), data = weather_data_subset)
## 
## $`as.factor(date)`
##                             diff          lwr        upr     p adj
## 2021-10-11-2021-10-05  4.4529412  3.402010646  5.5038717 0.0000000
## 2021-10-12-2021-10-05  2.7891266  1.826859858  3.7513933 0.0000000
## 2021-10-18-2021-10-05  4.2470588  3.196128294  5.2979894 0.0000000
## 2021-10-19-2021-10-05  1.0529412  0.002010646  2.1038717 0.0493042
## 2021-10-12-2021-10-11 -1.6638146 -2.410678423 -0.9169508 0.0000001
## 2021-10-18-2021-10-11 -0.2058824 -1.063963537  0.6521988 0.9643335
## 2021-10-19-2021-10-11 -3.4000000 -4.258081185 -2.5419188 0.0000000
## 2021-10-18-2021-10-12  1.4579323  0.711068458  2.2047961 0.0000023
## 2021-10-19-2021-10-12 -1.7361854 -2.483049189 -0.9893216 0.0000000
## 2021-10-19-2021-10-18 -3.1941176 -4.052198832 -2.3360365 0.0000000
ggplot(data = weather_data_subset) + 
   aes(x = as.factor(date), 
       y = wind_speed_mph , 
       color = as.factor(date)) +
   geom_boxplot(size = 1) +
   geom_jitter(size = 0.6,
               alpha = 0.6) +
   theme_classic() + 
   xlab("Date") + 
   ylab("Average Wind Speed During Capture (mph)") + 
   #annotate("text", x = 3, y = 22, label = "example", size = 6) +
   scale_color_brewer(palette = "Set2") +
   theme(text = element_text(color = "black", 
                            family = "sans", 
                            size = 18),
         axis.text = element_text(color = "black", 
                                 family = "sans", 
                                 size = 14),
         legend.text.align = 0,
         legend.position = "none") -> weather_wind_fig
weather_wind_fig

Thermocouple Data

filenames:

# make a list of file names of all data to load in
filenames_thermocouple <- list.files(path = "./Data/Thermocouple data",
                                     pattern = ".csv")

Thermocouple variables that we will load in with each file:

  • date
  • time
  • lizard ID
  • value_cloacal = cloacal temperature in Celcius
  • thermocouple_cloacal = label of cloacal thermocouple
  • value_dorsal = dorsal temperature in Celcius
  • thermocouple_dorsal = label of dorsal thermocouple
read_thermocouple_files <- function(filename) {
   
   # load file
   dat <- read.csv(file.path("./Data/Thermocouple data",
                             filename), 
                   header = TRUE # each csv has headers
                   ) %>%
      # select only the relevant values
      dplyr::select(date = 'Date',
                    time = 'Time',
                    lizard_ID = 'Lizard',
                    value_cloacal = 'Value_Cloacal',
                    thermocouple_cloacal = 'Thermocouple_Cloacal',
                    value_dorsal = 'Value_Dorsal',
                    thermocouple_dorsal = 'Thermocouple_Dorsal'
                    ) 
     
  # return the dataframe for that single csv file
  dat
}

apply function and format variables:

# apply function to get data from all csvs
all_thermocouple_data <- lapply(filenames_thermocouple,
                                read_thermocouple_files) %>%
   # paste all data files together into one df by row
   reduce(rbind) %>%
   # properly format data classes
   mutate(date_time = as.POSIXct(paste(date, time), 
                                 format = "%m/%d/%Y %H:%M:%S"),
          date = as.Date(date, format = "%m/%d/%Y"),
          time = as.POSIXct(time, format = "%H:%M:%S"),
          lizard_ID = as.factor(lizard_ID),
          thermocouple_cloacal = as.factor(thermocouple_cloacal),
          thermocouple_dorsal = as.factor(thermocouple_dorsal),
          value_cloacal = as.numeric(value_cloacal),
          value_dorsal = as.numeric(value_dorsal)
          ) %>%
   dplyr::filter(complete.cases(value_cloacal, value_dorsal)) %>%
   # need to get rid of non-measurements when TC not plugged in, temp = 9999
   dplyr::filter(value_dorsal < 50) %>%
   dplyr::filter(value_cloacal < 50)

summary(all_thermocouple_data)
##       date                 time                          lizard_ID    
##  Min.   :2021-10-05   Min.   :2023-01-12 13:17:44.00   406    : 1886  
##  1st Qu.:2021-10-11   1st Qu.:2023-01-12 14:31:25.25   407    : 1009  
##  Median :2021-10-12   Median :2023-01-12 15:22:32.00   404    : 1003  
##  Mean   :2021-10-12   Mean   :2023-01-12 15:26:16.27   436    :  976  
##  3rd Qu.:2021-10-18   3rd Qu.:2023-01-12 16:20:06.75   430    :  969  
##  Max.   :2021-10-19   Max.   :2023-01-12 17:48:51.00   425    :  964  
##                                                        (Other):28847  
##  value_cloacal   thermocouple_cloacal  value_dorsal   thermocouple_dorsal
##  Min.   :11.80   E      :9326         Min.   :12.50   A      :7651       
##  1st Qu.:20.60   D      :5778         1st Qu.:22.20   F      :6477       
##  Median :25.20   B      :5696         Median :25.30   D      :5667       
##  Mean   :24.51   F      :5595         Mean   :24.95   G      :3756       
##  3rd Qu.:27.40   C      :3633         3rd Qu.:27.30   E      :3740       
##  Max.   :38.00   A      :2820         Max.   :36.30   H      :3633       
##                  (Other):2806                         (Other):4730       
##    date_time                     
##  Min.   :2021-10-05 13:17:44.00  
##  1st Qu.:2021-10-11 14:12:38.25  
##  Median :2021-10-12 15:19:27.50  
##  Mean   :2021-10-13 15:21:52.15  
##  3rd Qu.:2021-10-18 17:01:30.75  
##  Max.   :2021-10-19 16:45:25.00  
## 

TC Calibration Data

The thermocouples tend to drift 1-2 C off of the true temperature, so we collected reference data to make a calibration curve for each one.

Get all the calibration data:

TCD1 <- read.csv("./Data/TC Calibration/thermocouple calibration data 1.csv")
TCD2 <- read.csv("./Data/TC Calibration/thermocouple calibration data 2.csv")
TCD3 <- read.csv("./Data/TC Calibration/thermocouple calibration data 3.csv")
TCR1 <- read.csv("./Data/TC Calibration/thermocouple_calibration_1_ref.csv") %>%
   mutate(date = "9/28/21")
TCR2 <- read.csv("./Data/TC Calibration/thermocouple_calibration_2_ref.csv") %>%
   mutate(date = "10/1/21")
TCR3 <- read.csv("./Data/TC Calibration/thermocouple_calibration_3_ref.csv") %>%
   mutate(date = "1/8/22")
TC_key <- read.csv("./Data/TC Calibration/thermocouple_calibration_key.csv")

Reorganize repeated columns into vertical form:

# thermocouple calibration data 1
TCD1.1 <- TCD1 %>% dplyr::select(date = Date, 
                                 time = Time, 
                                 temp_C = Value, 
                                 port = Unit)
TCD1.2 <- TCD1 %>% dplyr::select(date = Date, 
                                 time = Time, 
                                 temp_C = Value.1, 
                                 port = Unit.1)
TCD1.3 <- TCD1 %>% dplyr::select(date = Date, 
                                 time = Time, 
                                 temp_C = Value.2, 
                                 port = Unit.2)
# thermocouple calibration data 2
TCD2.1 <- TCD2 %>% dplyr::select(date = Date, 
                                 time = Time, 
                                 temp_C = Value, 
                                 port = Unit)
TCD2.2 <- TCD2 %>% dplyr::select(date = Date, 
                                 time = Time, 
                                 temp_C = Value.1, 
                                 port = Unit.1)
TCD2.3 <- TCD2 %>% dplyr::select(date = Date, 
                                 time = Time, 
                                 temp_C = Value.2, 
                                 port = Unit.2)
TCD2.4 <- TCD2 %>% dplyr::select(date = Date, 
                                 time = Time, 
                                 temp_C = Value.3, 
                                 port = Unit.3)
# thermocouple calibration data 3
TCD3.1 <- TCD3 %>% dplyr::select(date = Date, 
                                 time = Time, 
                                 temp_C = Value, 
                                 port = Unit)
# format and combine
TC_ref <- TCR1 %>%
   rbind(TCR2) %>%
   rbind(TCR3) %>%
   mutate(date = as.Date(date, format = "%m/%d/%y")) %>%
   dplyr::filter(complete.cases(reference_temp))
TC_key_formatted <- TC_key %>%
   mutate(date = as.Date(date, format = "%m/%d/%y"),
          lead = as.factor(lead),
          port = as.factor(port))
calibration_data <- TCD1.1 %>%
   rbind(TCD1.2) %>%
   rbind(TCD1.3) %>%
   rbind(TCD2.1) %>%
   rbind(TCD2.2) %>%
   rbind(TCD2.3) %>%
   rbind(TCD2.4) %>%
   rbind(TCD3.1) %>%
   mutate(date = as.Date(date, format = "%m/%d/%Y"),
          time = substr(time, 1, 5),
          port = as.factor(substr(port, 1, 2))
          ) %>%
   left_join(TC_key_formatted, by = c("date", "port")) %>%
   left_join(TC_ref, by = c("date", "time")) %>%
   dplyr::filter(complete.cases(reference_temp))
summary(calibration_data)
##       date                time               temp_C      port          lead    
##  Min.   :2021-09-28   Length:1004        Min.   : 7.30   T1:366   B      :151  
##  1st Qu.:2021-09-28   Class :character   1st Qu.:16.30   T2:263   E      :151  
##  Median :2021-10-01   Mode  :character   Median :25.00   T3:263   H      :151  
##  Mean   :2021-10-09                      Mean   :25.38   T4:112   A      :112  
##  3rd Qu.:2021-10-01                      3rd Qu.:33.40            C      :112  
##  Max.   :2022-01-08                      Max.   :41.20            D      :112  
##                                                                   (Other):215  
##  reference_temp 
##  Min.   : 1.50  
##  1st Qu.:15.00  
##  Median :23.50  
##  Mean   :23.95  
##  3rd Qu.:32.00  
##  Max.   :40.00  
## 

Data Wrangling

CEWL Errors

Remove sections of points where the probe got disconnected:

lizard note 403 error @520, moved @650 405 error @320 &660 406 error @250 & 850 408 broke seal ~170 411 large move @400 414 large move @660 426 Big move 436 move @~700

Lizard 403

This lizard’s data will NOT be included in analyses due to suspected Aquaflux failure to re-equibrilate.

all_CEWL_data %>%
   dplyr::filter(lizard_ID == 403) %>%
       ggplot(aes(x = time_sec,
           y = CEWL_g_m2h)) + 
   geom_point() + 
   geom_vline(xintercept = 520) + # first error note
   geom_vline(xintercept = 650) + # second error note
   xlim(0, 900) + ylim(5,15)
## Warning: Removed 77 rows containing missing values (`geom_point()`).

l403_remove <- c(500:560, 600:660)
l403_filtered <- all_CEWL_data %>%
   dplyr::filter(lizard_ID == 403) %>%
   dplyr::filter(floor(time_sec) %nin% l403_remove)
ggplot(data = l403_filtered,
       aes(x = time_sec,
           y = CEWL_g_m2h)) + 
   geom_point() + 
   geom_vline(xintercept = 520) + # first error note
   geom_vline(xintercept = 650) + # second error note
   xlim(60, 900) #+ ylim(5, 12)
## Warning: Removed 60 rows containing missing values (`geom_point()`).

summary(l403_filtered)
##     time_sec       CEWL_g_m2h     msmt_temp_C    msmt_RH_percent
##  Min.   :  0.0   Min.   : 4.88   Min.   :25.20   Min.   :47.50  
##  1st Qu.:194.6   1st Qu.: 8.22   1st Qu.:25.30   1st Qu.:47.90  
##  Median :389.1   Median :10.02   Median :25.30   Median :48.00  
##  Mean   :429.9   Mean   :11.05   Mean   :25.32   Mean   :47.95  
##  3rd Qu.:706.0   3rd Qu.:12.48   3rd Qu.:25.40   3rd Qu.:48.10  
##  Max.   :900.6   Max.   :26.26   Max.   :25.60   Max.   :48.20  
##                                                                 
##       date              lizard_ID   date_time_start              
##  Min.   :2021-10-05   403    :765   Min.   :2021-10-05 16:30:31  
##  1st Qu.:2021-10-05   401    :  0   1st Qu.:2021-10-05 16:30:31  
##  Median :2021-10-05   402    :  0   Median :2021-10-05 16:30:31  
##  Mean   :2021-10-05   404    :  0   Mean   :2021-10-05 16:30:31  
##  3rd Qu.:2021-10-05   405    :  0   3rd Qu.:2021-10-05 16:30:31  
##  Max.   :2021-10-05   406    :  0   Max.   :2021-10-05 16:30:31  
##                       (Other):  0                                
##    date_time                     
##  Min.   :2021-10-05 16:30:31.00  
##  1st Qu.:2021-10-05 16:33:45.00  
##  Median :2021-10-05 16:37:00.00  
##  Mean   :2021-10-05 16:37:40.45  
##  3rd Qu.:2021-10-05 16:42:17.00  
##  Max.   :2021-10-05 16:45:31.00  
## 

Lizard 405

This lizard’s data will NOT be included in analyses due to suspected Aquaflux failure to re-equibrilate.

all_CEWL_data %>%
   dplyr::filter(lizard_ID == 405) %>%
       ggplot(aes(x = time_sec,
           y = CEWL_g_m2h)) + 
   geom_point() + 
   geom_vline(xintercept = 320) + # error 1
   geom_vline(xintercept = 660) + # error 2
   xlim(0,900) + ylim(0,15)
## Warning: Removed 63 rows containing missing values (`geom_point()`).

l405_remove <- c(297:357, 635:695)
l405_filtered <- all_CEWL_data %>%
   dplyr::filter(lizard_ID == 405) %>%
   dplyr::filter(floor(time_sec) %nin% l405_remove)
ggplot(data = l405_filtered,
       aes(x = time_sec,
           y = CEWL_g_m2h)) + 
   geom_point() + 
   geom_vline(xintercept = 320) + 
   geom_vline(xintercept = 660) + 
   xlim(60, 900) + ylim(0, 12)
## Warning: Removed 107 rows containing missing values (`geom_point()`).

summary(l405_filtered)
##     time_sec       CEWL_g_m2h      msmt_temp_C    msmt_RH_percent
##  Min.   :  0.0   Min.   : 2.630   Min.   :25.40   Min.   :46.10  
##  1st Qu.:194.6   1st Qu.: 4.160   1st Qu.:25.40   1st Qu.:46.40  
##  Median :450.2   Median : 6.330   Median :25.50   Median :46.60  
##  Mean   :442.9   Mean   : 7.324   Mean   :25.47   Mean   :46.54  
##  3rd Qu.:705.8   3rd Qu.: 8.200   3rd Qu.:25.50   3rd Qu.:46.70  
##  Max.   :900.3   Max.   :21.190   Max.   :25.50   Max.   :46.80  
##                                                                  
##       date              lizard_ID   date_time_start              
##  Min.   :2021-10-05   405    :765   Min.   :2021-10-05 15:25:40  
##  1st Qu.:2021-10-05   401    :  0   1st Qu.:2021-10-05 15:25:40  
##  Median :2021-10-05   402    :  0   Median :2021-10-05 15:25:40  
##  Mean   :2021-10-05   403    :  0   Mean   :2021-10-05 15:25:40  
##  3rd Qu.:2021-10-05   404    :  0   3rd Qu.:2021-10-05 15:25:40  
##  Max.   :2021-10-05   406    :  0   Max.   :2021-10-05 15:25:40  
##                       (Other):  0                                
##    date_time                     
##  Min.   :2021-10-05 15:25:40.00  
##  1st Qu.:2021-10-05 15:28:54.00  
##  Median :2021-10-05 15:33:10.00  
##  Mean   :2021-10-05 15:33:02.46  
##  3rd Qu.:2021-10-05 15:37:25.00  
##  Max.   :2021-10-05 15:40:40.00  
## 

Lizard 406

Will remove from 238 to 287 seconds, 60 seconds total, for beginning error and last 81 seconds (820 to 901 seconds) due to failure to re-equibrilate after second error.

all_CEWL_data %>%
   dplyr::filter(lizard_ID == 406) %>%
       ggplot(aes(x = time_sec,
           y = CEWL_g_m2h)) + 
   geom_point() + 
   geom_vline(xintercept = 238) + # error 1
   geom_vline(xintercept = 287) + # error 2
   geom_hline(yintercept = 7.5) +
   xlim(200, 900) + ylim(0, 12)
## Warning: Removed 198 rows containing missing values (`geom_point()`).

l406_remove <- c(238:287, 820:901)
l406_filtered <- all_CEWL_data %>%
   dplyr::filter(lizard_ID == 406) %>%
   dplyr::filter(floor(time_sec) %nin% l406_remove)
ggplot(data = l406_filtered,
       aes(x = time_sec,
           y = CEWL_g_m2h)) + 
   geom_point() +
   geom_vline(xintercept = 250) + # error 1
   geom_vline(xintercept = 850) + # error 2
   xlim(60, 900) + ylim(0, 12)
## Warning: Removed 123 rows containing missing values (`geom_point()`).

summary(l406_filtered)
##     time_sec       CEWL_g_m2h      msmt_temp_C   msmt_RH_percent
##  Min.   :  0.0   Min.   : 1.380   Min.   :25.3   Min.   :46.50  
##  1st Qu.:192.2   1st Qu.: 4.628   1st Qu.:25.4   1st Qu.:46.60  
##  Median :434.8   Median : 5.865   Median :25.4   Median :46.60  
##  Mean   :419.2   Mean   : 7.433   Mean   :25.4   Mean   :46.61  
##  3rd Qu.:627.1   3rd Qu.: 8.020   3rd Qu.:25.4   3rd Qu.:46.70  
##  Max.   :819.2   Max.   :21.000   Max.   :25.5   Max.   :46.70  
##                                                                 
##       date              lizard_ID   date_time_start              
##  Min.   :2021-10-05   406    :756   Min.   :2021-10-05 14:58:21  
##  1st Qu.:2021-10-05   401    :  0   1st Qu.:2021-10-05 14:58:21  
##  Median :2021-10-05   402    :  0   Median :2021-10-05 14:58:21  
##  Mean   :2021-10-05   403    :  0   Mean   :2021-10-05 14:58:21  
##  3rd Qu.:2021-10-05   404    :  0   3rd Qu.:2021-10-05 14:58:21  
##  Max.   :2021-10-05   405    :  0   Max.   :2021-10-05 14:58:21  
##                       (Other):  0                                
##    date_time                     
##  Min.   :2021-10-05 14:58:21.00  
##  1st Qu.:2021-10-05 15:01:32.75  
##  Median :2021-10-05 15:05:35.50  
##  Mean   :2021-10-05 15:05:19.78  
##  3rd Qu.:2021-10-05 15:08:47.25  
##  Max.   :2021-10-05 15:12:00.00  
## 

Lizard 408

Will remove one section from 144 to 222 seconds, 80 seconds total.

all_CEWL_data %>%
   dplyr::filter(lizard_ID == 408) %>%
       ggplot(aes(x = time_sec,
           y = CEWL_g_m2h)) + 
   geom_point() + 
   geom_vline(xintercept = 144) + 
   geom_vline(xintercept = 222) + # error @170 when seal broke
   geom_hline(yintercept = 26) +
   xlim(100,300) + ylim(0,40)
## Warning: Removed 689 rows containing missing values (`geom_point()`).

l408_remove <- c(144:222) 
l408_filtered <- all_CEWL_data %>%
   dplyr::filter(lizard_ID == 408) %>%
   dplyr::filter(floor(time_sec) %nin% l408_remove)
ggplot(data = l408_filtered,
       aes(x = time_sec,
           y = CEWL_g_m2h)) + 
   geom_point() + 
   geom_vline(xintercept = 144) + # error @170 when seal broke
   geom_vline(xintercept = 222) + 
   xlim(60, 900) + ylim(0, 35)
## Warning: Removed 60 rows containing missing values (`geom_point()`).

summary(l408_filtered)
##     time_sec       CEWL_g_m2h     msmt_temp_C    msmt_RH_percent
##  Min.   :  0.0   Min.   : 1.11   Min.   :25.60   Min.   :46.70  
##  1st Qu.:284.1   1st Qu.:19.98   1st Qu.:25.70   1st Qu.:47.00  
##  Median :489.4   Median :20.97   Median :25.70   Median :47.10  
##  Mean   :475.7   Mean   :21.46   Mean   :25.72   Mean   :47.14  
##  3rd Qu.:695.0   3rd Qu.:23.27   3rd Qu.:25.70   3rd Qu.:47.30  
##  Max.   :900.5   Max.   :26.05   Max.   :26.00   Max.   :47.50  
##                                                                 
##       date              lizard_ID   date_time_start              
##  Min.   :2021-10-05   408    :808   Min.   :2021-10-05 13:21:12  
##  1st Qu.:2021-10-05   401    :  0   1st Qu.:2021-10-05 13:21:12  
##  Median :2021-10-05   402    :  0   Median :2021-10-05 13:21:12  
##  Mean   :2021-10-05   403    :  0   Mean   :2021-10-05 13:21:12  
##  3rd Qu.:2021-10-05   404    :  0   3rd Qu.:2021-10-05 13:21:12  
##  Max.   :2021-10-05   405    :  0   Max.   :2021-10-05 13:21:12  
##                       (Other):  0                                
##    date_time                     
##  Min.   :2021-10-05 13:21:12.00  
##  1st Qu.:2021-10-05 13:25:55.75  
##  Median :2021-10-05 13:29:21.00  
##  Mean   :2021-10-05 13:29:07.26  
##  3rd Qu.:2021-10-05 13:32:46.25  
##  Max.   :2021-10-05 13:36:12.00  
## 

Lizard 411

Will remove one section from 301 to 415 seconds, 114 seconds total.

all_CEWL_data %>%
   dplyr::filter(lizard_ID == 411) %>%
       ggplot(aes(x = time_sec,
           y = CEWL_g_m2h)) + 
   geom_point() + 
   geom_vline(xintercept = 301) + 
   geom_vline(xintercept = 415) + # large move at 400s
   geom_hline(yintercept = 7) +
   xlim(250,450) + ylim(0,10)
## Warning: Removed 689 rows containing missing values (`geom_point()`).

l411_remove <- c(301:415)
l411_filtered <- all_CEWL_data %>%
   dplyr::filter(lizard_ID == 411) %>%
   dplyr::filter(floor(time_sec) %nin% l411_remove)
ggplot(data = l411_filtered,
       aes(x = time_sec,
           y = CEWL_g_m2h)) + 
   geom_point() + 
   geom_vline(xintercept = 297) + 
   geom_vline(xintercept = 660) + 
   xlim(60, 900) + ylim(0, 12)
## Warning: Removed 96 rows containing missing values (`geom_point()`).

summary(l411_filtered)
##     time_sec       CEWL_g_m2h      msmt_temp_C    msmt_RH_percent
##  Min.   :  0.0   Min.   : 4.230   Min.   :25.30   Min.   :32.50  
##  1st Qu.:196.4   1st Qu.: 4.890   1st Qu.:25.50   1st Qu.:32.90  
##  Median :507.7   Median : 6.010   Median :25.50   Median :33.30  
##  Mean   :463.8   Mean   : 7.878   Mean   :25.49   Mean   :33.22  
##  3rd Qu.:704.2   3rd Qu.: 7.610   3rd Qu.:25.50   3rd Qu.:33.50  
##  Max.   :900.7   Max.   :32.320   Max.   :25.60   Max.   :33.70  
##                                                                  
##       date              lizard_ID   date_time_start              
##  Min.   :2021-10-11   411    :773   Min.   :2021-10-11 14:11:19  
##  1st Qu.:2021-10-11   401    :  0   1st Qu.:2021-10-11 14:11:19  
##  Median :2021-10-11   402    :  0   Median :2021-10-11 14:11:19  
##  Mean   :2021-10-11   403    :  0   Mean   :2021-10-11 14:11:19  
##  3rd Qu.:2021-10-11   404    :  0   3rd Qu.:2021-10-11 14:11:19  
##  Max.   :2021-10-11   405    :  0   Max.   :2021-10-11 14:11:19  
##                       (Other):  0                                
##    date_time                     
##  Min.   :2021-10-11 14:11:19.00  
##  1st Qu.:2021-10-11 14:14:35.00  
##  Median :2021-10-11 14:19:46.00  
##  Mean   :2021-10-11 14:19:02.33  
##  3rd Qu.:2021-10-11 14:23:03.00  
##  Max.   :2021-10-11 14:26:19.00  
## 

Lizard 414

Will remove last 246 seconds (655 to 901 seconds) due to failure to re-equibrilate after error.

all_CEWL_data %>%
   dplyr::filter(lizard_ID == 414) %>%
       ggplot(aes(x = time_sec,
           y = CEWL_g_m2h)) + 
   geom_point() + 
   geom_vline(xintercept = 655) + # large move @660s
   geom_vline(xintercept = 720) + 
   xlim(0,900) + ylim(0,15)
## Warning: Removed 1 rows containing missing values (`geom_point()`).

l414_remove <- c(655:901)
l414_filtered <- all_CEWL_data %>%
   dplyr::filter(lizard_ID == 414) %>%
   dplyr::filter(floor(time_sec) %nin% l414_remove)
ggplot(data = l414_filtered,
       aes(x = time_sec,
           y = CEWL_g_m2h)) + 
   geom_point() + 
   geom_vline(xintercept = 660) + # large move @660s
   xlim(60, 900) + ylim(0, 12)
## Warning: Removed 59 rows containing missing values (`geom_point()`).

summary(l414_filtered)
##     time_sec       CEWL_g_m2h      msmt_temp_C    msmt_RH_percent
##  Min.   :  0.0   Min.   : 1.510   Min.   :25.60   Min.   :33.50  
##  1st Qu.:163.6   1st Qu.: 4.330   1st Qu.:25.70   1st Qu.:33.60  
##  Median :327.3   Median : 4.735   Median :25.70   Median :33.70  
##  Mean   :327.3   Mean   : 5.530   Mean   :25.73   Mean   :33.68  
##  3rd Qu.:490.9   3rd Qu.: 5.350   3rd Qu.:25.80   3rd Qu.:33.70  
##  Max.   :654.5   Max.   :13.740   Max.   :25.90   Max.   :33.80  
##                                                                  
##       date              lizard_ID   date_time_start              
##  Min.   :2021-10-11   414    :644   Min.   :2021-10-11 16:57:39  
##  1st Qu.:2021-10-11   401    :  0   1st Qu.:2021-10-11 16:57:39  
##  Median :2021-10-11   402    :  0   Median :2021-10-11 16:57:39  
##  Mean   :2021-10-11   403    :  0   Mean   :2021-10-11 16:57:39  
##  3rd Qu.:2021-10-11   404    :  0   3rd Qu.:2021-10-11 16:57:39  
##  Max.   :2021-10-11   405    :  0   Max.   :2021-10-11 16:57:39  
##                       (Other):  0                                
##    date_time                     
##  Min.   :2021-10-11 16:57:39.00  
##  1st Qu.:2021-10-11 17:00:21.75  
##  Median :2021-10-11 17:03:05.50  
##  Mean   :2021-10-11 17:03:05.83  
##  3rd Qu.:2021-10-11 17:05:49.25  
##  Max.   :2021-10-11 17:08:33.00  
## 

Lizard 426

Will remove 404 seconds (497 to 901 seconds) due to failure to re-equibrilate after error.

all_CEWL_data %>%
   dplyr::filter(lizard_ID == 426) %>%
       ggplot(aes(x = time_sec,
           y = CEWL_g_m2h)) + 
   geom_point() + 
   geom_vline(xintercept = 497) + 
   geom_vline(xintercept = 630) + 
   xlim(400, 900) + ylim(0,15)
## Warning: Removed 394 rows containing missing values (`geom_point()`).

l426_remove <- c(497:901)
l426_filtered <- all_CEWL_data %>%
   dplyr::filter(lizard_ID == 426) %>%
   dplyr::filter(floor(time_sec) %nin% l426_remove)
ggplot(data = l426_filtered,
       aes(x = time_sec,
           y = CEWL_g_m2h)) + 
   geom_point() + 
   geom_vline(xintercept = 297) + 
   geom_vline(xintercept = 660) + 
   xlim(60, 900) + ylim(0, 12)
## Warning: Removed 59 rows containing missing values (`geom_point()`).

summary(l426_filtered)
##     time_sec       CEWL_g_m2h      msmt_temp_C    msmt_RH_percent
##  Min.   :  0.0   Min.   : 1.060   Min.   :25.10   Min.   :33.60  
##  1st Qu.:124.2   1st Qu.: 2.960   1st Qu.:25.20   1st Qu.:33.70  
##  Median :248.4   Median : 3.640   Median :25.20   Median :33.70  
##  Mean   :248.3   Mean   : 5.276   Mean   :25.18   Mean   :33.74  
##  3rd Qu.:372.4   3rd Qu.: 6.327   3rd Qu.:25.20   3rd Qu.:33.80  
##  Max.   :496.5   Max.   :17.200   Max.   :25.30   Max.   :33.90  
##                                                                  
##       date              lizard_ID   date_time_start              
##  Min.   :2021-10-18   426    :488   Min.   :2021-10-18 15:08:34  
##  1st Qu.:2021-10-18   401    :  0   1st Qu.:2021-10-18 15:08:34  
##  Median :2021-10-18   402    :  0   Median :2021-10-18 15:08:34  
##  Mean   :2021-10-18   403    :  0   Mean   :2021-10-18 15:08:34  
##  3rd Qu.:2021-10-18   404    :  0   3rd Qu.:2021-10-18 15:08:34  
##  Max.   :2021-10-18   405    :  0   Max.   :2021-10-18 15:08:34  
##                       (Other):  0                                
##    date_time                     
##  Min.   :2021-10-18 15:08:34.00  
##  1st Qu.:2021-10-18 15:10:37.75  
##  Median :2021-10-18 15:12:41.50  
##  Mean   :2021-10-18 15:12:41.86  
##  3rd Qu.:2021-10-18 15:14:46.25  
##  Max.   :2021-10-18 15:16:50.00  
## 

Lizard 436

Will remove 201 seconds (700 to 901 seconds) due to failure to re-equibrilate after error.

all_CEWL_data %>%
   dplyr::filter(lizard_ID == 436) %>%
       ggplot(aes(x = time_sec,
           y = CEWL_g_m2h)) + 
   geom_point() + 
   geom_vline(xintercept = 700) + # moved here
   geom_vline(xintercept = 820) + 
   xlim(0, 900) + ylim(15, 30)
## Warning: Removed 12 rows containing missing values (`geom_point()`).

l436_remove <- c(700:901)
l436_filtered <- all_CEWL_data %>%
   dplyr::filter(lizard_ID == 436) %>%
   dplyr::filter(floor(time_sec) %nin% l436_remove)
ggplot(data = l436_filtered,
       aes(x = time_sec,
           y = CEWL_g_m2h)) + 
   geom_point() + 
   geom_vline(xintercept = 297) + 
   geom_vline(xintercept = 660) + 
   xlim(0, 900) + ylim(0, 30)

summary(l436_filtered)
##     time_sec       CEWL_g_m2h     msmt_temp_C    msmt_RH_percent
##  Min.   :  0.0   Min.   : 0.56   Min.   :24.00   Min.   :35.40  
##  1st Qu.:174.9   1st Qu.:21.53   1st Qu.:24.40   1st Qu.:35.60  
##  Median :349.9   Median :22.29   Median :24.50   Median :35.70  
##  Mean   :349.8   Mean   :21.87   Mean   :24.45   Mean   :35.77  
##  3rd Qu.:524.9   3rd Qu.:22.82   3rd Qu.:24.50   3rd Qu.:35.90  
##  Max.   :699.5   Max.   :23.81   Max.   :24.60   Max.   :36.80  
##                                                                 
##       date              lizard_ID   date_time_start              
##  Min.   :2021-10-19   436    :688   Min.   :2021-10-19 15:21:03  
##  1st Qu.:2021-10-19   401    :  0   1st Qu.:2021-10-19 15:21:03  
##  Median :2021-10-19   402    :  0   Median :2021-10-19 15:21:03  
##  Mean   :2021-10-19   403    :  0   Mean   :2021-10-19 15:21:03  
##  3rd Qu.:2021-10-19   404    :  0   3rd Qu.:2021-10-19 15:21:03  
##  Max.   :2021-10-19   405    :  0   Max.   :2021-10-19 15:21:03  
##                       (Other):  0                                
##    date_time                     
##  Min.   :2021-10-19 15:21:03.00  
##  1st Qu.:2021-10-19 15:23:57.75  
##  Median :2021-10-19 15:26:52.50  
##  Mean   :2021-10-19 15:26:52.41  
##  3rd Qu.:2021-10-19 15:29:47.25  
##  Max.   :2021-10-19 15:32:42.00  
## 

error summary Two lizards (403 and 405) will be completely removed due to suspected Aquaflux failure to re-equibrilate. Six lizards had spikes noted at the time of data collection that were caused by the AquaFlux seal being broken, and we removed those so they will not be considered in our data analyses. Of those six noted spike errors, three lizards had last remaining portion of data removed due to suspected Aquaflux failure to re-equibrilate.

Put Error-Removed Data Back

Data for lizards 403 and 405 will be removed completely, and the rest will have their cleaned-up data added back into the dataframe.

err_lizards <- c(403, 405, 406, 408, 411, 414, 426, 436)
filtered1_CEWL_data <- all_CEWL_data %>%
   dplyr::filter(lizard_ID %nin% err_lizards) %>%
   rbind(l406_filtered) %>%
   rbind(l408_filtered) %>%
   rbind(l411_filtered) %>%
   rbind(l414_filtered) %>%
   rbind(l426_filtered) %>%
   rbind(l436_filtered) #%>%
  #mutate(lizard_ID = as.factor(lizard_ID))

summary(filtered1_CEWL_data)
##     time_sec       CEWL_g_m2h     msmt_temp_C    msmt_RH_percent
##  Min.   :  0.0   Min.   : 0.03   Min.   :24.00   Min.   :21.10  
##  1st Qu.:218.9   1st Qu.: 7.86   1st Qu.:24.80   1st Qu.:32.30  
##  Median :441.0   Median :13.27   Median :25.20   Median :33.60  
##  Mean   :442.7   Mean   :13.76   Mean   :25.14   Mean   :34.36  
##  3rd Qu.:663.2   3rd Qu.:18.66   3rd Qu.:25.50   3rd Qu.:36.50  
##  Max.   :901.0   Max.   :37.40   Max.   :26.00   Max.   :48.10  
##                                                                 
##       date              lizard_ID     date_time_start                 
##  Min.   :2021-10-05   417    :  887   Min.   :2021-10-05 13:21:12.00  
##  1st Qu.:2021-10-11   409    :  886   1st Qu.:2021-10-11 15:11:30.00  
##  Median :2021-10-12   410    :  886   Median :2021-10-12 15:37:50.00  
##  Mean   :2021-10-13   412    :  886   Mean   :2021-10-14 02:09:35.76  
##  3rd Qu.:2021-10-18   413    :  886   3rd Qu.:2021-10-18 17:01:43.00  
##  Max.   :2021-10-19   415    :  886   Max.   :2021-10-19 16:30:20.00  
##                       (Other):26292                                   
##    date_time                     
##  Min.   :2021-10-05 13:21:12.00  
##  1st Qu.:2021-10-11 15:15:47.00  
##  Median :2021-10-12 15:44:59.00  
##  Mean   :2021-10-14 02:16:58.02  
##  3rd Qu.:2021-10-18 17:14:31.00  
##  Max.   :2021-10-19 16:45:20.00  
## 
unique(filtered1_CEWL_data$lizard_ID)
##  [1] 401 402 404 407 409 410 412 413 415 416 417 418 419 420 421 422 423 424 425
## [20] 427 428 429 430 431 432 433 434 435 437 438 439 406 408 411 414 426 436
## 39 Levels: 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 ... 439

CEWL Equilibration

The evaporimeter also takes 1-2 minutes to equilibrate to the lizard at the start of each time series, so we want to estimate when equilibration was complete for each lizard.

ggplot(data = filtered1_CEWL_data,
       aes(x = time_sec,
           y = CEWL_g_m2h, 
           color = lizard_ID)) + 
   geom_point(size = 0.5, alpha = 0.5) + 
   geom_vline(xintercept = 60) + 
   geom_vline(xintercept = 120) +
   xlim(0, 200)
## Warning: Removed 24375 rows containing missing values (`geom_point()`).

# find max beginning value
start_peaks <- filtered1_CEWL_data %>%
   dplyr::filter(time_sec < 80) %>%
   group_by(lizard_ID) %>%
   mutate(peak_CEWL = max(CEWL_g_m2h)) %>%
   dplyr::filter(peak_CEWL == CEWL_g_m2h) %>%
  # dplyr::filter(lizard_ID != 428) %>% # I think this guy just KEPT INCREASING
   # BUT filtering out lizards RN makes for-loop not work
   group_by(lizard_ID) %>%
   summarise(peak_time = max(time_sec))
summary(start_peaks)
##    lizard_ID    peak_time    
##  401    : 1   Min.   : 6.10  
##  402    : 1   1st Qu.:20.40  
##  404    : 1   Median :24.40  
##  406    : 1   Mean   :27.09  
##  407    : 1   3rd Qu.:29.50  
##  408    : 1   Max.   :79.40  
##  (Other):31

Apply peak times to filter out evaporimeter equilibration, remove 60 seconds following peak CEWL during AquaFlux equibrilation to ensure Aquaflux reaches true equilibrium.

# initialize empty df
filtered2_df <- data.frame()

# lizard = lizard_ID & list = 
for (lizard in unique(filtered1_CEWL_data$lizard_ID)) {
   
   # get all data for each lizard
   this_lizard <- filtered1_CEWL_data %>%
      dplyr::filter(lizard_ID == lizard)
   
   # get time of beginning peak
   this_liz_peak <- start_peaks %>% 
      dplyr::filter(lizard_ID == lizard)
   
   # calculate n seconds after peak
   end <- floor(this_liz_peak$peak_time + 60) # +60 seconds to time of max peak
   
   # make a list of the seconds to remove
   remove <- c(0:end)
   
   # clean each lizard's data
   this_liz_cleaned <- this_lizard %>%
      dplyr::filter(floor(time_sec) %nin% remove)

   # append to df
   filtered2_df <- filtered2_df %>%
      rbind(this_liz_cleaned)
   
   # should compile themselves into filtered_df 
   # which will be ready to use after running this loop
}

Check that the dataframe was compiled with ALL the lizards and that the plot looks better now:

summary(filtered2_df)
##     time_sec       CEWL_g_m2h     msmt_temp_C    msmt_RH_percent
##  Min.   : 67.3   Min.   : 0.11   Min.   :24.10   Min.   :21.20  
##  1st Qu.:286.4   1st Qu.: 7.38   1st Qu.:24.80   1st Qu.:32.30  
##  Median :485.1   Median :13.00   Median :25.20   Median :33.60  
##  Mean   :487.6   Mean   :13.43   Mean   :25.13   Mean   :34.39  
##  3rd Qu.:686.5   3rd Qu.:18.29   3rd Qu.:25.50   3rd Qu.:36.50  
##  Max.   :901.0   Max.   :31.07   Max.   :26.00   Max.   :48.10  
##                                                                 
##       date              lizard_ID     date_time_start                 
##  Min.   :2021-10-05   407    :  819   Min.   :2021-10-05 13:21:12.00  
##  1st Qu.:2021-10-11   404    :  813   1st Qu.:2021-10-11 15:11:30.00  
##  Median :2021-10-12   412    :  809   Median :2021-10-12 15:37:50.00  
##  Mean   :2021-10-13   425    :  807   Mean   :2021-10-14 01:38:41.63  
##  3rd Qu.:2021-10-18   410    :  806   3rd Qu.:2021-10-18 17:01:43.00  
##  Max.   :2021-10-19   424    :  806   Max.   :2021-10-19 16:30:20.00  
##                       (Other):23545                                   
##    date_time                     
##  Min.   :2021-10-05 13:22:41.00  
##  1st Qu.:2021-10-11 15:16:21.00  
##  Median :2021-10-12 15:45:10.00  
##  Mean   :2021-10-14 01:46:48.80  
##  3rd Qu.:2021-10-18 17:13:48.00  
##  Max.   :2021-10-19 16:45:20.00  
## 
unique(filtered2_df$lizard_ID)
##  [1] 401 402 404 407 409 410 412 413 415 416 417 418 419 420 421 422 423 424 425
## [20] 427 428 429 430 431 432 433 434 435 437 438 439 406 408 411 414 426 436
## 39 Levels: 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 ... 439
ggplot(data = filtered2_df,
       aes(x = time_sec,
           y = CEWL_g_m2h,
           color = lizard_ID)) + 
   geom_point(size = 0.1, alpha = 0.5) + 
   geom_vline(xintercept = 60) + 
   geom_vline(xintercept = 120)

DF looks complete and relative change in CEWL looks much more reasonable now that the equilibration period was removed.

Calibrate Thermocouple Data

First, calculate and save regression curves:

# calculate regression curves
regression_a <- lm(data = calibration_data[calibration_data$lead == "A",],
                   temp_C ~ reference_temp) 
regression_b <- lm(data = calibration_data[calibration_data$lead == "B",],
                   temp_C ~ reference_temp)
regression_c <- lm(data = calibration_data[calibration_data$lead == "C",],
                   temp_C ~ reference_temp)
regression_d <- lm(data = calibration_data[calibration_data$lead == "D",],
                   temp_C ~ reference_temp)
regression_e <- lm(data = calibration_data[calibration_data$lead == "E",],
                   temp_C ~ reference_temp)
regression_f <- lm(data = calibration_data[calibration_data$lead == "F",],
                   temp_C ~ reference_temp)
regression_g <- lm(data = calibration_data[calibration_data$lead == "G",],
                   temp_C ~ reference_temp)
regression_h <- lm(data = calibration_data[calibration_data$lead == "H",],
                   temp_C ~ reference_temp)

# create df to store calibration equations
TC_calibration <- data.frame(TC_A = (coef(regression_a))) %>% 
   `rownames<-`(c("intercept", "slope"))
TC_calibration$TC_B <- coef(regression_b)
TC_calibration$TC_C <- coef(regression_c)
TC_calibration$TC_D <- coef(regression_d)
TC_calibration$TC_E <- coef(regression_e)
TC_calibration$TC_F <- coef(regression_f)
TC_calibration$TC_G <- coef(regression_g)
TC_calibration$TC_H <- coef(regression_h)
TC_calibration
##                TC_A      TC_B      TC_C      TC_D      TC_E      TC_F      TC_G
## intercept 1.6231526 1.6611910 1.5559682 1.7000154 1.6606398 1.6289434 3.1508939
## slope     0.9890007 0.9814998 0.9943169 0.9896303 0.9869329 0.9907762 0.9756173
##                TC_H
## intercept 1.8051208
## slope     0.9646041

Next, use calibration curves to correct the data we measured:

# this applies respective calibration equation 
# to all cloacal thermocouple data points of that proper thermocouple
# 2=slope & 1=intercept

calibrated_TC_data <- all_thermocouple_data %>%
      # do for cloacal thermocouple temps
   mutate(calibrated_cloacal_temp = case_when(
      thermocouple_cloacal == "A" ~ value_cloacal*TC_calibration$TC_A[2] +
                                           TC_calibration$TC_A[1],
      thermocouple_cloacal == "B" ~ value_cloacal*TC_calibration$TC_B[2] +
                                           TC_calibration$TC_B[1],
      thermocouple_cloacal == "C" ~ value_cloacal*TC_calibration$TC_C[2] +
                                           TC_calibration$TC_C[1],
      thermocouple_cloacal == "D" ~ value_cloacal*TC_calibration$TC_D[2] +
                                           TC_calibration$TC_D[1],
      thermocouple_cloacal == "E" ~ value_cloacal*TC_calibration$TC_E[2] +
                                           TC_calibration$TC_E[1],
      thermocouple_cloacal == "F" ~ value_cloacal*TC_calibration$TC_F[2] +
                                           TC_calibration$TC_F[1],
      thermocouple_cloacal == "G" ~ value_cloacal*TC_calibration$TC_G[2] +
                                           TC_calibration$TC_G[1],
      thermocouple_cloacal == "H" ~ value_cloacal*TC_calibration$TC_H[2] +
                                           TC_calibration$TC_H[1]
                                                ),
      # do for dorsal thermocouples also
      calibrated_dorsal_temp = case_when(
      thermocouple_dorsal == "A" ~ value_dorsal*TC_calibration$TC_A[2] +
                                           TC_calibration$TC_A[1],
      thermocouple_dorsal == "B" ~ value_dorsal*TC_calibration$TC_B[2] +
                                           TC_calibration$TC_B[1],
      thermocouple_dorsal == "C" ~ value_dorsal*TC_calibration$TC_C[2] +
                                           TC_calibration$TC_C[1],
      thermocouple_dorsal == "D" ~ value_dorsal*TC_calibration$TC_D[2] +
                                           TC_calibration$TC_D[1],
      thermocouple_dorsal == "E" ~ value_dorsal*TC_calibration$TC_E[2] +
                                           TC_calibration$TC_E[1],
      thermocouple_dorsal == "F" ~ value_dorsal*TC_calibration$TC_F[2] +
                                           TC_calibration$TC_F[1],
      thermocouple_dorsal == "G" ~ value_dorsal*TC_calibration$TC_G[2] +
                                           TC_calibration$TC_G[1],
      thermocouple_dorsal == "H" ~ value_dorsal*TC_calibration$TC_H[2] +
                                           TC_calibration$TC_H[1]
                                                )
                  )
summary(calibrated_TC_data)
##       date                 time                          lizard_ID    
##  Min.   :2021-10-05   Min.   :2023-01-12 13:17:44.00   406    : 1886  
##  1st Qu.:2021-10-11   1st Qu.:2023-01-12 14:31:25.25   407    : 1009  
##  Median :2021-10-12   Median :2023-01-12 15:22:32.00   404    : 1003  
##  Mean   :2021-10-12   Mean   :2023-01-12 15:26:16.27   436    :  976  
##  3rd Qu.:2021-10-18   3rd Qu.:2023-01-12 16:20:06.75   430    :  969  
##  Max.   :2021-10-19   Max.   :2023-01-12 17:48:51.00   425    :  964  
##                                                        (Other):28847  
##  value_cloacal   thermocouple_cloacal  value_dorsal   thermocouple_dorsal
##  Min.   :11.80   E      :9326         Min.   :12.50   A      :7651       
##  1st Qu.:20.60   D      :5778         1st Qu.:22.20   F      :6477       
##  Median :25.20   B      :5696         Median :25.30   D      :5667       
##  Mean   :24.51   F      :5595         Mean   :24.95   G      :3756       
##  3rd Qu.:27.40   C      :3633         3rd Qu.:27.30   E      :3740       
##  Max.   :38.00   A      :2820         Max.   :36.30   H      :3633       
##                  (Other):2806                         (Other):4730       
##    date_time                      calibrated_cloacal_temp
##  Min.   :2021-10-05 13:17:44.00   Min.   :13.86          
##  1st Qu.:2021-10-11 14:12:38.25   1st Qu.:22.04          
##  Median :2021-10-12 15:19:27.50   Median :26.54          
##  Mean   :2021-10-13 15:21:52.15   Mean   :25.88          
##  3rd Qu.:2021-10-18 17:01:30.75   3rd Qu.:28.80          
##  Max.   :2021-10-19 16:45:25.00   Max.   :38.96          
##                                                          
##  calibrated_dorsal_temp
##  Min.   :13.99         
##  1st Qu.:23.72         
##  Median :26.81         
##  Mean   :26.39         
##  3rd Qu.:28.72         
##  Max.   :37.59         
## 

Correct Thermocouple Times

The TC logger times were off by a few minutes compared to the evaporimeter, but we consistently started the evaporimeter immediately following the TC (with the exception of lizards 428, 437, 439 whose temperature data was started late and will be removed from the temperature dataset).

First, remove ~5 seconds from starting, then create time_sec variable:

TC_corrected_data <- calibrated_TC_data %>%
                # remove lizards with messed up data
   dplyr::filter(lizard_ID %nin% c(428, 437, 439)) %>% 
   group_by(lizard_ID) %>%
   mutate(min_time = min(date_time)) %>%
                # shift start time to more closely match that for CEWL
   dplyr::filter(date_time > (min_time + 4)) %>%
   group_by(lizard_ID) %>%
              # put time in the same format at CEWL data
   mutate(start_time = min(date_time),
          time_sec = as.numeric(date_time - start_time)) %>%
              # remove extra time for loggers stopped late, after the 15min period
  dplyr::filter(time_sec <= (15*60)) 

summary(TC_corrected_data)
##       date                 time                          lizard_ID    
##  Min.   :2021-10-05   Min.   :2023-01-12 13:17:49.00   401    :  901  
##  1st Qu.:2021-10-11   1st Qu.:2023-01-12 14:19:08.00   402    :  901  
##  Median :2021-10-12   Median :2023-01-12 15:17:36.50   403    :  901  
##  Mean   :2021-10-12   Mean   :2023-01-12 15:21:59.75   404    :  901  
##  3rd Qu.:2021-10-18   3rd Qu.:2023-01-12 16:14:05.75   406    :  901  
##  Max.   :2021-10-19   Max.   :2023-01-12 17:48:22.00   407    :  901  
##                                                        (Other):25228  
##  value_cloacal   thermocouple_cloacal  value_dorsal  thermocouple_dorsal
##  Min.   :11.80   E      :7208         Min.   :12.5   A      :6307       
##  1st Qu.:20.60   B      :5406         1st Qu.:22.2   D      :5406       
##  Median :25.20   D      :5406         Median :25.2   F      :5406       
##  Mean   :24.43   F      :5406         Mean   :24.9   E      :3604       
##  3rd Qu.:27.30   A      :2703         3rd Qu.:27.3   G      :3604       
##  Max.   :37.50   C      :1802         Max.   :35.9   B      :2703       
##                  (Other):2703                        (Other):3604       
##    date_time                      calibrated_cloacal_temp
##  Min.   :2021-10-05 13:17:49.00   Min.   :14.06          
##  1st Qu.:2021-10-11 14:16:05.25   1st Qu.:22.08          
##  Median :2021-10-12 14:55:29.00   Median :26.53          
##  Mean   :2021-10-13 11:07:52.71   Mean   :25.80          
##  3rd Qu.:2021-10-18 15:53:44.75   3rd Qu.:28.68          
##  Max.   :2021-10-19 16:24:08.00   Max.   :38.47          
##                                                          
##  calibrated_dorsal_temp    min_time                     
##  Min.   :13.99          Min.   :2021-10-05 13:17:44.00  
##  1st Qu.:23.82          1st Qu.:2021-10-11 14:08:30.00  
##  Median :26.89          Median :2021-10-12 14:47:54.00  
##  Mean   :26.38          Mean   :2021-10-13 11:00:17.71  
##  3rd Qu.:28.62          3rd Qu.:2021-10-18 15:46:10.00  
##  Max.   :37.05          Max.   :2021-10-19 16:09:03.00  
##                                                         
##    start_time                        time_sec  
##  Min.   :2021-10-05 13:17:49.00   Min.   :  0  
##  1st Qu.:2021-10-11 14:08:35.00   1st Qu.:225  
##  Median :2021-10-12 14:47:59.00   Median :450  
##  Mean   :2021-10-13 11:00:22.71   Mean   :450  
##  3rd Qu.:2021-10-18 15:46:15.00   3rd Qu.:675  
##  Max.   :2021-10-19 16:09:08.00   Max.   :900  
## 
hist(TC_corrected_data$time_sec)

Good, each lizard should have up to 900 seconds of data, which is what we see in the histogram.

Summary Stats

Relative CEWL

starting_CEWL <- filtered2_df %>%
   group_by(lizard_ID) %>%
   mutate(start_time = min(time_sec)) %>%
   dplyr::filter(time_sec == start_time) %>%
   dplyr::select(lizard_ID, starting_CEWL = CEWL_g_m2h)
head(starting_CEWL)
## # A tibble: 6 × 2
## # Groups:   lizard_ID [6]
##   lizard_ID starting_CEWL
##   <fct>             <dbl>
## 1 401               28.5 
## 2 402               25.5 
## 3 404               25.5 
## 4 407               28.3 
## 5 409               21.2 
## 6 410                7.58
summary(starting_CEWL)
##    lizard_ID  starting_CEWL  
##  401    : 1   Min.   : 4.64  
##  402    : 1   1st Qu.: 9.54  
##  404    : 1   Median :14.17  
##  406    : 1   Mean   :15.66  
##  407    : 1   3rd Qu.:21.24  
##  408    : 1   Max.   :28.52  
##  (Other):31

Join Dataframes

Lizards 416 (not logged at all), 428, 437, and 439 (logger started very late), do not have thermocouple data, so they should be in the CEWL_time_series dataframe, but not the temp_time_series dataframe.

Lizards 403 and 405 have too many errors so they should have been removed completely and will not show up in either of the dataframes below:

CEWL_time_series <- filtered2_df %>%
   mutate(time_sec = as.numeric(floor(time_sec))) %>%
   left_join(collection_data, 
             by = c('date', "lizard_ID" = "individual_ID")) %>%
   dplyr::select(time_sec, date,
                 lizard_ID, treatment,
                 mass_g, SVL_mm,
                 chamber_time_elapsed_hr,
                 CEWL_g_m2h, 
                 msmt_temp_C, msmt_RH_percent) %>%
   left_join(starting_CEWL, by = "lizard_ID") %>%
   mutate(relative_CEWL = CEWL_g_m2h - starting_CEWL,
          treatment = as.factor(treatment),
          time_min = time_sec/60) 
summary(CEWL_time_series)
##     time_sec          date              lizard_ID       treatment    
##  Min.   : 67.0   Min.   :2021-10-05   407    :  819   Control: 9452  
##  1st Qu.:286.0   1st Qu.:2021-10-11   404    :  813   Cooling: 8743  
##  Median :485.0   Median :2021-10-12   412    :  809   Heating:10210  
##  Mean   :487.2   Mean   :2021-10-13   425    :  807                  
##  3rd Qu.:686.0   3rd Qu.:2021-10-18   410    :  806                  
##  Max.   :901.0   Max.   :2021-10-19   424    :  806                  
##                                       (Other):23545                  
##      mass_g          SVL_mm     chamber_time_elapsed_hr   CEWL_g_m2h   
##  Min.   : 8.70   Min.   :60.0   Min.   :0.9333          Min.   : 0.11  
##  1st Qu.:10.00   1st Qu.:64.0   1st Qu.:1.0000          1st Qu.: 7.38  
##  Median :11.30   Median :66.0   Median :1.0333          Median :13.00  
##  Mean   :11.42   Mean   :66.3   Mean   :1.0991          Mean   :13.43  
##  3rd Qu.:12.60   3rd Qu.:69.0   3rd Qu.:1.1667          3rd Qu.:18.29  
##  Max.   :15.30   Max.   :75.0   Max.   :1.5333          Max.   :31.07  
##                                                                        
##   msmt_temp_C    msmt_RH_percent starting_CEWL   relative_CEWL    
##  Min.   :24.10   Min.   :21.20   Min.   : 4.64   Min.   :-18.000  
##  1st Qu.:24.80   1st Qu.:32.30   1st Qu.: 9.54   1st Qu.: -5.170  
##  Median :25.20   Median :33.60   Median :14.17   Median : -2.260  
##  Mean   :25.13   Mean   :34.39   Mean   :15.76   Mean   : -2.332  
##  3rd Qu.:25.50   3rd Qu.:36.50   3rd Qu.:21.24   3rd Qu.:  0.420  
##  Max.   :26.00   Max.   :48.10   Max.   :28.52   Max.   : 10.800  
##                                                                   
##     time_min     
##  Min.   : 1.117  
##  1st Qu.: 4.767  
##  Median : 8.083  
##  Mean   : 8.120  
##  3rd Qu.:11.433  
##  Max.   :15.017  
## 
unique(CEWL_time_series$lizard_ID)
##  [1] 401 402 404 407 409 410 412 413 415 416 417 418 419 420 421 422 423 424 425
## [20] 427 428 429 430 431 432 433 434 435 437 438 439 406 408 411 414 426 436
## 39 Levels: 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 ... 439
length(unique(CEWL_time_series$lizard_ID)) # 37
## [1] 37
temp_time_series <- CEWL_time_series %>%
   # filter out lizards with missing TC data
   dplyr::filter(lizard_ID %nin% c(428, 437, 439, 416)) %>%
   left_join(TC_corrected_data, 
             by = c('date', 'time_sec', "lizard_ID")) %>%
  dplyr::filter(complete.cases(calibrated_cloacal_temp, calibrated_dorsal_temp))
summary(temp_time_series)
##     time_sec          date              lizard_ID       treatment   
##  Min.   : 67.0   Min.   :2021-10-05   407    :  819   Control:8652  
##  1st Qu.:284.0   1st Qu.:2021-10-11   404    :  813   Cooling:8743  
##  Median :483.0   Median :2021-10-12   412    :  809   Heating:7856  
##  Mean   :485.6   Mean   :2021-10-13   410    :  806                 
##  3rd Qu.:684.0   3rd Qu.:2021-10-18   424    :  806                 
##  Max.   :900.0   Max.   :2021-10-19   425    :  806                 
##                                       (Other):20392                 
##      mass_g          SVL_mm      chamber_time_elapsed_hr   CEWL_g_m2h   
##  Min.   : 8.80   Min.   :60.00   Min.   :0.9333          Min.   : 0.11  
##  1st Qu.:10.30   1st Qu.:64.00   1st Qu.:1.0000          1st Qu.: 6.64  
##  Median :11.70   Median :66.00   Median :1.0167          Median :11.78  
##  Mean   :11.64   Mean   :66.68   Mean   :1.1032          Mean   :12.71  
##  3rd Qu.:12.70   3rd Qu.:69.00   3rd Qu.:1.1667          3rd Qu.:17.39  
##  Max.   :15.30   Max.   :75.00   Max.   :1.5333          Max.   :31.07  
##                                                                         
##   msmt_temp_C    msmt_RH_percent starting_CEWL   relative_CEWL    
##  Min.   :24.10   Min.   :21.20   Min.   : 4.64   Min.   :-18.000  
##  1st Qu.:24.80   1st Qu.:30.20   1st Qu.: 9.35   1st Qu.: -5.260  
##  Median :25.20   Median :33.60   Median :13.56   Median : -2.320  
##  Mean   :25.14   Mean   :34.37   Mean   :15.22   Mean   : -2.505  
##  3rd Qu.:25.50   3rd Qu.:36.50   3rd Qu.:21.23   3rd Qu.:  0.380  
##  Max.   :26.00   Max.   :48.10   Max.   :28.52   Max.   :  9.820  
##                                                                   
##     time_min           time                        value_cloacal  
##  Min.   : 1.117   Min.   :2023-01-12 13:19:18.00   Min.   :12.60  
##  1st Qu.: 4.733   1st Qu.:2023-01-12 14:17:04.00   1st Qu.:21.20  
##  Median : 8.050   Median :2023-01-12 15:14:24.00   Median :25.30  
##  Mean   : 8.093   Mean   :2023-01-12 15:20:19.41   Mean   :24.46  
##  3rd Qu.:11.400   3rd Qu.:2023-01-12 16:11:52.50   3rd Qu.:27.20  
##  Max.   :15.000   Max.   :2023-01-12 17:48:22.00   Max.   :37.50  
##                                                                   
##  thermocouple_cloacal  value_dorsal   thermocouple_dorsal
##  E      :6163         Min.   :14.60   A      :5363       
##  F      :4804         1st Qu.:22.40   D      :4780       
##  B      :4777         Median :25.20   F      :4694       
##  D      :4498         Mean   :24.77   G      :3201       
##  A      :2013         3rd Qu.:27.10   E      :2812       
##  H      :1473         Max.   :34.30   B      :2208       
##  (Other):1523                         (Other):2193       
##    date_time                      calibrated_cloacal_temp
##  Min.   :2021-10-05 13:19:18.00   Min.   :14.11          
##  1st Qu.:2021-10-11 14:47:47.50   1st Qu.:22.61          
##  Median :2021-10-12 15:15:18.00   Median :26.60          
##  Mean   :2021-10-13 15:45:59.15   Mean   :25.82          
##  3rd Qu.:2021-10-18 15:58:54.50   3rd Qu.:28.51          
##  Max.   :2021-10-19 16:24:08.00   Max.   :38.47          
##                                                          
##  calibrated_dorsal_temp    min_time                     
##  Min.   :16.06          Min.   :2021-10-05 13:17:44.00  
##  1st Qu.:24.14          1st Qu.:2021-10-11 14:42:50.00  
##  Median :26.93          Median :2021-10-12 15:07:39.00  
##  Mean   :26.28          Mean   :2021-10-13 15:37:48.54  
##  3rd Qu.:28.43          3rd Qu.:2021-10-18 15:46:10.00  
##  Max.   :35.66          Max.   :2021-10-19 16:09:03.00  
##                                                         
##    start_time                    
##  Min.   :2021-10-05 13:17:49.00  
##  1st Qu.:2021-10-11 14:42:55.00  
##  Median :2021-10-12 15:07:44.00  
##  Mean   :2021-10-13 15:37:53.54  
##  3rd Qu.:2021-10-18 15:46:15.00  
##  Max.   :2021-10-19 16:09:08.00  
## 
unique(temp_time_series$lizard_ID)
##  [1] 401 402 404 407 409 410 412 413 415 417 418 419 420 421 422 423 424 425 427
## [20] 429 430 431 432 433 434 435 438 406 408 411 414 426 436
## 39 Levels: 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 ... 439
# check for lizards that should be in CEWL data but not temp data
unique(CEWL_time_series$lizard_ID)[unique(CEWL_time_series$lizard_ID) %nin% unique(temp_time_series$lizard_ID)]
## [1] 416 428 437 439
## 39 Levels: 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 ... 439
length(unique(temp_time_series$lizard_ID)) # 33
## [1] 33

Mean Body Temps

starts <- temp_time_series %>%
  group_by(lizard_ID) %>%
  dplyr::filter(time_sec == min(time_sec))
starts %>% 
  group_by(treatment) %>%
  summarise(mean(calibrated_cloacal_temp, na.rm = T),
            sd(calibrated_cloacal_temp, na.rm = T),
            mean(calibrated_dorsal_temp, na.rm = T),
            sd(calibrated_dorsal_temp, na.rm = T))
## # A tibble: 3 × 5
##   treatment `mean(calibrated_cloacal_temp, na.rm = T)` sd(cali…¹ mean(…² sd(ca…³
##   <fct>                                          <dbl>     <dbl>   <dbl>   <dbl>
## 1 Control                                         27.0      1.33    27.2    1.00
## 2 Cooling                                         33.8      1.06    33.6    1.31
## 3 Heating                                         17.6      1.51    20.2    2.80
## # … with abbreviated variable names ¹​`sd(calibrated_cloacal_temp, na.rm = T)`,
## #   ²​`mean(calibrated_dorsal_temp, na.rm = T)`,
## #   ³​`sd(calibrated_dorsal_temp, na.rm = T)`
ends <- temp_time_series %>%
  group_by(lizard_ID) %>%
  dplyr::filter(time_sec == max(time_sec))
ends %>% 
  group_by(treatment) %>%
  summarise(mean(calibrated_cloacal_temp, na.rm = T),
            sd(calibrated_cloacal_temp, na.rm = T),
            mean(calibrated_dorsal_temp, na.rm = T),
            sd(calibrated_dorsal_temp, na.rm = T))
## # A tibble: 3 × 5
##   treatment `mean(calibrated_cloacal_temp, na.rm = T)` sd(cali…¹ mean(…² sd(ca…³
##   <fct>                                          <dbl>     <dbl>   <dbl>   <dbl>
## 1 Control                                         28.1      1.25    28.2    1.05
## 2 Cooling                                         18.1      2.91    20.1    2.73
## 3 Heating                                         34.3      2.33    31.3    3.11
## # … with abbreviated variable names ¹​`sd(calibrated_cloacal_temp, na.rm = T)`,
## #   ²​`mean(calibrated_dorsal_temp, na.rm = T)`,
## #   ³​`sd(calibrated_dorsal_temp, na.rm = T)`

CEWL per Body Temp per Tmt

I want to know what the typical CEWL value was at a given temperature, on average for lizards within treatment groups.

# first aggregate all body temps to the nearest degree, by indiv, then by tmt
mean_CEWL_temp <- temp_time_series %>%
  # round temp to nearest degree
  mutate(rounded_cal_c_temp = round(calibrated_cloacal_temp, 0)) %>%
  # avg each individual at each degree
  group_by(lizard_ID, treatment, rounded_cal_c_temp) %>%
  summarise(mean_CEWL = mean(CEWL_g_m2h)) %>%
  # avg each tmt group at each degree
  group_by(treatment, rounded_cal_c_temp) %>%
  summarise(mean_mean_CEWL = mean(mean_CEWL))
## `summarise()` has grouped output by 'lizard_ID', 'treatment'. You can override
## using the `.groups` argument.
## `summarise()` has grouped output by 'treatment'. You can override using the
## `.groups` argument.
# get values for heating group only
heat_CEWL <- mean_CEWL_temp %>%
  dplyr::filter(treatment == "Heating") %>%
  dplyr::select(treatment, rounded_cal_c_temp, heating_mm_CEWL = mean_mean_CEWL)
write_rds(heat_CEWL, "./Data/CEWL_by_temp_heating_tmt.csv")

# get values for cooling group only
cool_CEWL <- mean_CEWL_temp %>%
  dplyr::filter(treatment == "Cooling")%>%
  dplyr::select(treatment, rounded_cal_c_temp, cooling_mm_CEWL = mean_mean_CEWL)
write_rds(cool_CEWL, "./Data/CEWL_by_temp_cooling_tmt.csv")

# calculate the diff between groups at a common temperature
compare_CEWL <- heat_CEWL %>%
  left_join(cool_CEWL, by = 'rounded_cal_c_temp') %>%
  mutate(diff = heating_mm_CEWL - cooling_mm_CEWL) %>%
  dplyr::filter(complete.cases(diff)) 

# summary stats
compare_CEWL %>%
  summarise(mean_CEWL_diff = mean(diff), 
            sd_CEWL_diff = sd(diff), 
            max_CEWL_diff = max(diff), 
            min_CEWL_diff = min(diff)) %>%
  mutate(sem_CEWL_diff = sd_CEWL_diff/sqrt(nrow(compare_CEWL)))
## # A tibble: 1 × 5
##   mean_CEWL_diff sd_CEWL_diff max_CEWL_diff min_CEWL_diff sem_CEWL_diff
##            <dbl>        <dbl>         <dbl>         <dbl>         <dbl>
## 1           10.3         1.70          12.3          6.26         0.379

VPD at CEWL Measurement

We can calculate the VPD from the RH and temp recorded by the evaporimeter during the measurements we took.

VPD_at_CEWL <- CEWL_time_series %>%
  # calculate VPD for each temp/RH recording
  mutate(e_s_kPa = 0.611 * exp((17.502*msmt_temp_C)/(msmt_temp_C + 240.97)),
         VPD_kPa = e_s_kPa*(1 - (msmt_RH_percent/100)),
         date = as.factor(date)) %>%
  # we want mean VPD experienced for each lizard
  group_by(lizard_ID, date, treatment) %>%
  summarise(VPD_at_msmt = mean(VPD_kPa))
## `summarise()` has grouped output by 'lizard_ID', 'date'. You can override using
## the `.groups` argument.
# now calculate mean sd, and range for dates and treatments
VPD_at_CEWL %>%
  group_by(date) %>%
  summarise(VPD_mean = mean(VPD_at_msmt),
            VPD_sd = sd(VPD_at_msmt),
            VPD_min = min(VPD_at_msmt),
            VPD_max = max(VPD_at_msmt))
## # A tibble: 5 × 5
##   date       VPD_mean VPD_sd VPD_min VPD_max
##   <fct>         <dbl>  <dbl>   <dbl>   <dbl>
## 1 2021-10-05     1.72 0.0246    1.69    1.74
## 2 2021-10-11     2.18 0.0398    2.14    2.28
## 3 2021-10-12     2.37 0.0597    2.27    2.42
## 4 2021-10-18     2.15 0.0252    2.11    2.18
## 5 2021-10-19     1.98 0.0760    1.90    2.10
VPD_at_CEWL %>%
  group_by(treatment) %>%
  summarise(VPD_mean = mean(VPD_at_msmt),
            VPD_sd = sd(VPD_at_msmt),
            VPD_min = min(VPD_at_msmt),
            VPD_max = max(VPD_at_msmt))
## # A tibble: 3 × 5
##   treatment VPD_mean VPD_sd VPD_min VPD_max
##   <fct>        <dbl>  <dbl>   <dbl>   <dbl>
## 1 Control       2.08  0.214    1.70    2.37
## 2 Cooling       2.11  0.198    1.73    2.41
## 3 Heating       2.08  0.246    1.69    2.42
# and overall mean sd, and range
mean(VPD_at_CEWL$VPD_at_msmt)
## [1] 2.091736
sd(VPD_at_CEWL$VPD_at_msmt)
## [1] 0.2152626
min(VPD_at_CEWL$VPD_at_msmt)
## [1] 1.689619
max(VPD_at_CEWL$VPD_at_msmt)  
## [1] 2.422628

Export RDS files

We export the wrangled dataframes as RDS files so they save the R formatting we created thoughout this file. If we saved them as csv’s, we would have to reformat them every time we read them into a new Rmd.

write_rds(temp_time_series, "Data/temp_time_series.RDS")
write_rds(CEWL_time_series, "Data/CEWL_time_series.RDS")
write_rds(collection_data, "Data/collection_dat_formatted.RDS")